#Dennis Moskov, Master Thesis
#Split by unique articles and make CV

#use database with article number

#randomly shuffle the data
set.seed(77)                      # seed for reproducibility
DBs<-DB[sample(nrow(DB)),]

useDB<-DBs

#divide data by article number
ref.num.CV <- unique(DBs[,1])

k <- 10		#choose number of folds

#build folds with unique article numbers
for(j in 1:k){
  if(j<k){
    change.index <- ref.num.CV[trunc(length(ref.num.CV)/k*(j-1)+1):trunc(length(ref.num.CV)/k*j)]
  }else{
    change.index <- ref.num.CV[trunc(length(ref.num.CV)/k*(j-1)+1):(length(ref.num.CV))]
  }
  for(l in 1:length(change.index)){
    useDB[(DBs[,1]==change.index[l]),1] = j
  }
}

#perform k fold cross validation
for(i in 1:k){
    #segement data by fold  
    testIndexes <- which(useDB[,1]==i,arr.ind=TRUE)
    testData <- useDB[testIndexes, ]               #test data fold k
    testData <- testData[-1]			#remove fold column
    trainData <- useDB[-testIndexes, ]             #training data fold k
    trainData <- trainData[-1]			#remove fold column
    
#add model and evaluation here
   
}


















